Chapter 10 Source of functional differences

10.1 Gut microbiome

load("data/gut/data.Rdata")

environment_prevalence <- genome_counts_filt %>% 
  pivot_longer(!genome,names_to = "sample",values_to = "counts") %>% 
  filter(sample!="EHI02625") %>% 
  left_join(sample_metadata,by="sample") %>% 
  group_by(environment) %>% 
  mutate(sample_size = n_distinct(sample), .groups = "drop") %>% 
  group_by(genome,environment) %>%
  summarise(presence = sum(counts > 0, na.rm = TRUE), 
            sample_size = mean(sample_size),
            mean = mean(counts),
            river = n_distinct(river[counts > 0]),
            .groups = "drop") %>% 
  mutate(prevalence=presence/sample_size)

# Must be present in at least 20% of samples
core_microbiota_prevalence <- environment_prevalence %>% 
  select(genome,environment,prevalence) %>% 
  pivot_wider(names_from = "environment",values_from="prevalence") %>% 
  mutate(type_prevalence = case_when(
      high > 0 & low > 0 ~ "core",
      high == 0 & low > 0.2 ~ "endemic",
      high >= 0.2 & low == 0 ~ "endemic",
      high == 0 & low <= 0.2 ~ "marginal",
      high <= 0.2 & low == 0 ~ "marginal",
      high == 0 & low == 0 ~ "absent"
    )) %>% 
  mutate(type_prevalence_environment = case_when(
      high > 0 & low > 0 ~ "core",
      high == 0 & low > 0.2 ~ "low",
      high >= 0.2 & low == 0 ~ "high",
      high == 0 & low <= 0.2 ~ "low",
      high <= 0.2 & low == 0 ~ "high",
      high == 0 & low == 0 ~ "absent"
    ))

# Must be present in both rivers
core_microbiota_river <- environment_prevalence %>% 
  select(genome,environment,river) %>% 
  pivot_wider(names_from = "environment",values_from="river") %>% 
  mutate(type_river = case_when(
      high >= 1 & low >= 1 ~ "core",
      high == 0 & low == 2 ~ "endemic",
      high == 2 & low == 0 ~ "endemic",
      high == 0 & low >= 1 ~ "marginal",
      high >= 1 & low == 0 ~ "marginal",
      high == 0 & low == 0 ~ "absent"
    )) %>% 
  mutate(type_river_environment = case_when(
      high >= 1 & low >= 1 ~ "core",
      high == 0 & low == 2 ~ "low",
      high == 2 & low == 0 ~ "high",
      high == 0 & low >= 1 ~ "low",
      high >= 1 & low == 0 ~ "high",
      high == 0 & low == 0 ~ "absent"
    ))

# Merge both criteria
core_microbiota <- inner_join(core_microbiota_prevalence %>% select(genome,type_prevalence,type_prevalence_environment),core_microbiota_river %>% select(genome,type_river),by="genome") %>% 
  mutate(match = ifelse(type_prevalence == type_river, 1, 0)) %>% 
  mutate(type= case_when(
      type_prevalence == "core" & type_river == "core" ~ "core",
      type_prevalence == "endemic" & type_river == "endemic" ~ "endemic",
      type_prevalence == "marginal" & type_river == "endemic" ~ "marginal",
      type_prevalence == "endemic" & type_river == "marginal" ~ "marginal",
      type_prevalence == "marginal" & type_river == "marginal" ~ "marginal",
      type_prevalence == "absent" & type_river == "absent" ~ "absent",
    )
  )
core_microbiota %>% 
  group_by(type_river) %>% 
  summarise(count=n())
# A tibble: 4 × 2
  type_river count
  <chr>      <int>
1 absent         3
2 core         389
3 endemic       99
4 marginal      48
environment_prevalence %>% 
  filter(river==2,
         prevalence==1) %>%
  select(genome) %>%
  unique() %>% 
  write_tsv("data/microdiversity/core.txt")
environment_prevalence %>%
  group_by(genome) %>%
  filter(any(environment == "high" & prevalence >0.9)) %>%
  filter(!any(environment == "low" & prevalence > 0)) %>%
  ungroup() %>%
  select(genome) %>%
  unique() %>% 
  write_tsv("data/microdiversity/high.txt")
environment_prevalence %>%
  group_by(genome) %>%
  filter(any(environment == "low" & prevalence > 0.7)) %>%
  filter(!any(environment == "high" & prevalence > 0)) %>%
  ungroup() %>%
  select(genome) %>%
  unique() %>% 
  write_tsv("data/microdiversity/low.txt")
partitioning_heatmap <- core_microbiota %>% 
  select(genome,type) %>% 
  column_to_rownames("genome")

# Generate  basal tree
partitioning_tree <- force.ultrametric(genome_tree, method="extend") %>%
                ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
function_tree <- gheatmap(partitioning_tree, partitioning_heatmap, offset=0, width=0.1, colnames=FALSE)

10.1.1 Genome size

core_microbiota %>% 
  left_join(genome_metadata,by="genome") %>% 
  group_by(type) %>% 
  summarise(genome_size=mean(length))
# A tibble: 4 × 2
  type     genome_size
  <chr>          <dbl>
1 core        2835369.
2 endemic     2407507.
3 marginal    2396801.
4 <NA>        2030171 
core_microbiota %>% 
  left_join(genome_metadata,by="genome") %>% 
  pairwise_wilcox_test(length ~ type, p.adjust.method = "BH")
# A tibble: 3 × 9
  .y.    group1  group2      n1    n2 statistic          p    p.adj p.adj.signif
* <chr>  <chr>   <chr>    <int> <int>     <dbl>      <dbl>    <dbl> <chr>       
1 length core    endemic    389    85     21740 0.00000533 0.000016 ****        
2 length core    marginal   389    62     15766 0.000101   0.000152 ***         
3 length endemic marginal    85    62      2738 0.688      0.688    ns          
core_microbiota %>% 
  left_join(genome_metadata,by="genome") %>% 
  ggplot(aes(x=type,y=length,group=type)) +
    geom_boxplot() +
    geom_jitter()

10.1.2 Taxonomic variation

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  group_by(type,sample) %>% 
  summarise(fraction=sum(count)) %>% 
  group_by(type) %>% 
  summarise(mean(fraction))
`summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
# A tibble: 4 × 2
  type     `mean(fraction)`
  <chr>               <dbl>
1 core               0.870 
2 endemic            0.0882
3 marginal           0.0407
4 <NA>               0.0746

10.1.3 Functional differences between fractions

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>%
    ggplot(aes(x=type, y=value, group=type))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type, p.adjust.method = "BH")
# A tibble: 3 × 10
  trait .y.   group1  group2      n1    n2 statistic          p      p.adj p.adj.signif
* <chr> <chr> <chr>   <chr>    <int> <int>     <dbl>      <dbl>      <dbl> <chr>       
1 mci   value core    endemic    389    85     21482 0.0000152  0.0000228  ****        
2 mci   value core    marginal   389    62     16678 0.00000126 0.00000378 ****        
3 mci   value endemic marginal    85    62      2903 0.294      0.294      ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type, p.adjust.method = "BH")
# A tibble: 6 × 10
  trait        .y.   group1  group2      n1    n2 statistic           p       p.adj p.adj.signif
* <chr>        <chr> <chr>   <chr>    <int> <int>     <dbl>       <dbl>       <dbl> <chr>       
1 Biosynthesis value core    endemic    389    85    21545  0.0000118   0.0000177   ****        
2 Biosynthesis value core    marginal   389    62    17034. 0.000000179 0.000000537 ****        
3 Biosynthesis value endemic marginal    85    62     2986  0.169       0.169       ns          
4 Degradation  value core    endemic    389    85    20437  0.000644    0.002       **          
5 Degradation  value core    marginal   389    62    15153  0.001       0.002       **          
6 Degradation  value endemic marginal    85    62     2687  0.84        0.84        ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>% 
    ggplot(aes(x=type, y=value, group=type))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type, p.adjust.method = "BH") %>% 
    print(n=100)
# A tibble: 42 × 10
   trait .y.   group1  group2      n1    n2 statistic           p       p.adj p.adj.signif
 * <chr> <chr> <chr>   <chr>    <int> <int>     <dbl>       <dbl>       <dbl> <chr>       
 1 B01   value core    endemic    389    85    20668. 0.000301    0.000451    ***         
 2 B01   value core    marginal   389    62    16226. 0.0000123   0.0000369   ****        
 3 B01   value endemic marginal    85    62     2914. 0.274       0.274       ns          
 4 B02   value core    endemic    389    85    21470. 0.0000159   0.0000238   ****        
 5 B02   value core    marginal   389    62    16529  0.00000274  0.00000822  ****        
 6 B02   value endemic marginal    85    62     2752. 0.649       0.649       ns          
 7 B03   value core    endemic    389    85    18336. 0.112       0.168       ns          
 8 B03   value core    marginal   389    62    13706. 0.081       0.168       ns          
 9 B03   value endemic marginal    85    62     2744. 0.666       0.666       ns          
10 B04   value core    endemic    389    85    16912. 0.74        0.74        ns          
11 B04   value core    marginal   389    62    15036  0.002       0.005       **          
12 B04   value endemic marginal    85    62     3186. 0.03        0.046       *           
13 B06   value core    endemic    389    85    22249  0.000000582 0.000000873 ****        
14 B06   value core    marginal   389    62    17100  0.000000123 0.000000369 ****        
15 B06   value endemic marginal    85    62     2915  0.273       0.273       ns          
16 B07   value core    endemic    389    85    22042  0.00000147  0.00000441  ****        
17 B07   value core    marginal   389    62    16465  0.0000038   0.0000057   ****        
18 B07   value endemic marginal    85    62     2798. 0.525       0.525       ns          
19 B08   value core    endemic    389    85    20472. 0.000563    0.000844    ***         
20 B08   value core    marginal   389    62    16182. 0.0000148   0.0000444   ****        
21 B08   value endemic marginal    85    62     3007  0.145       0.145       ns          
22 D01   value core    endemic    389    85    19124. 0.016       0.047       *           
23 D01   value core    marginal   389    62    12628. 0.527       0.527       ns          
24 D01   value endemic marginal    85    62     2368  0.238       0.357       ns          
25 D02   value core    endemic    389    85    21914. 0.00000254  0.00000762  ****        
26 D02   value core    marginal   389    62    15312. 0.000644    0.000966    ***         
27 D02   value endemic marginal    85    62     2430  0.422       0.422       ns          
28 D03   value core    endemic    389    85    19320. 0.015       0.022       *           
29 D03   value core    marginal   389    62    14923  0.003       0.008       **          
30 D03   value endemic marginal    85    62     2743  0.673       0.673       ns          
31 D05   value core    endemic    389    85    20434. 0.000649    0.000974    ***         
32 D05   value core    marginal   389    62    15667  0.000154    0.000462    ***         
33 D05   value endemic marginal    85    62     2868. 0.363       0.363       ns          
34 D06   value core    endemic    389    85    19654  0.006       0.018       *           
35 D06   value core    marginal   389    62    12642. 0.538       0.538       ns          
36 D06   value endemic marginal    85    62     2322  0.219       0.328       ns          
37 D07   value core    endemic    389    85    16382  0.895       0.972       ns          
38 D07   value core    marginal   389    62    12092. 0.972       0.972       ns          
39 D07   value endemic marginal    85    62     2690. 0.828       0.972       ns          
40 D09   value core    endemic    389    85    17899  0.225       0.225       ns          
41 D09   value core    marginal   389    62    15196. 0.000804    0.002       **          
42 D09   value endemic marginal    85    62     3028. 0.117       0.176       ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>% 
    ggplot(aes(x=type, y=value, group=type))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

10.1.4 Functional differences between high and low endemisms

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment)) %>%
    ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment)) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH")
# A tibble: 1 × 10
  trait .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
* <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
1 mci   value high   low       47    38       552 0.002 0.002 **          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH") %>% 
    print(n=100)
# A tibble: 14 × 10
   trait .y.   group1 group2    n1    n2 statistic        p    p.adj p.adj.signif
 * <chr> <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl> <chr>       
 1 B01   value high   low       47    38      814. 0.485    0.485    ns          
 2 B02   value high   low       47    38      588. 0.007    0.007    **          
 3 B03   value high   low       47    38      668  0.045    0.045    *           
 4 B04   value high   low       47    38      684  0.065    0.065    ns          
 5 B06   value high   low       47    38      635  0.023    0.023    *           
 6 B07   value high   low       47    38      639  0.025    0.025    *           
 7 B08   value high   low       47    38      740. 0.179    0.179    ns          
 8 D01   value high   low       47    38      750. 0.143    0.143    ns          
 9 D02   value high   low       47    38      614  0.014    0.014    *           
10 D03   value high   low       47    38      592  0.008    0.008    **          
11 D05   value high   low       47    38      637  0.024    0.024    *           
12 D06   value high   low       47    38      682. 0.061    0.061    ns          
13 D07   value high   low       47    38      660. 0.04     0.04     *           
14 D09   value high   low       47    38      523  0.000929 0.000929 ***         

10.1.5 MAGs in different locations and shared among locations

locationcolors=c('#c4d7d1','#408892','#2d3749','#c04062','#6b3a59','#e08683')
locationcolors=c('#c4d7d1','#408892','#c04062','#e08683')

genome_counts_rel <- genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  column_to_rownames(., "genome")

genome_counts_rel_fil<- genome_counts_filt%>% 
    select(one_of(c("genome",sample_metadata$sample))) %>% 
  column_to_rownames(., "genome")
  
genome_counts_rel_pa=1*(genome_counts_rel_fil>0)
#MAGrel_pa[1:6,1:6]
table_upset_analysis_cont=t(aggregate(t(genome_counts_rel_pa),by=list(sample_metadata$river),FUN=sum)[,-1])
colnames(table_upset_analysis_cont)=levels(as.factor(sample_metadata$river))
table_upset_analysis=(table_upset_analysis_cont>0)*1
table_upset_analysis=data.frame(table_upset_analysis)
table_upset_analysis=apply(table_upset_analysis,2,as.integer)
rownames(table_upset_analysis) <- rownames(genome_counts_rel_pa)

#pdf("figures/MAG_intersection.pdf",width=8,height=6, onefile=F)
upset(as.data.frame(table_upset_analysis),
  keep.order = T,
  sets = rev(c("Erlan","Harpea","Leitzaran","Goizueta")),
  sets.bar.color= rev(locationcolors),
  mb.ratio = c(0.55, 0.45), order.by = "freq")

#dev.off()

10.1.5.1 Core

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="core") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.1.5.2 Endemic

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="endemic") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    ylim(0,1)+
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.1.5.3 Marginal

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="marginal") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    ylim(0,1)+
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.1.5.4 Ordination

structural <- core_microbiota %>% 
  mutate(type = case_when(
      (type_prevalence == "endemic") & (type_prevalence_environment == "high") ~ "present_high",
       (type_prevalence == "endemic") & (type_prevalence_environment == "low") ~ "present_low")) %>% 
    filter(!is.na(type)) %>% 
    select(genome,type)

enriched <- ancom_mag_gut_table %>% 
  mutate(type = case_when(
      lfc_environmentlow < 0 ~ "enriched_low",
      lfc_environmentlow >= 0 ~ "enriched_high")) %>% 
  select(genome,type)


scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(bind_rows(structural,enriched), by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  mutate(type=ifelse(is.na(type),"neutral",type)) %>%
  group_by(type) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      scale_size_continuous(range = c(0.1,5)) +
      geom_point(aes(x=Axis.1,y=Axis.2, color=type, size=length), alpha=0.9, shape=16) +
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=type), alpha = 0.5) +
      scale_color_manual(values=c("#5A99D2","#F16144","#B7B7B7","#225072","#8C2C1C"))+
    theme_minimal() + 
    theme(legend.position = "none")
Warning: Removed 40 rows containing missing values or values outside the scale range (`geom_point()`).

scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(core_microbiota, by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  filter(type != "absent") %>% 
  mutate(shape = case_when(
      type == "core" ~ "core",
      type == "marginal" ~ "marginal",
      type == "endemic" & type_prevalence_environment == "high" ~ "endemic_high",
      type == "endemic" & type_prevalence_environment == "low" ~ "endemic_low"
    )) %>% 
  group_by(shape) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      #genome positions
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=shape), alpha = 0.5, show.legend = FALSE) +
      scale_color_manual(values=c("#9E8F71","#5A99D2","#F16144","#B7B7B7"))+
      theme_minimal() 

   # theme(legend.position = "none")

10.1.6 Functional variation

10.1.6.1 Core

#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

core_genomes <- core_microbiota %>% filter(type=="core") %>% pull(genome)
endemic_genomes <- core_microbiota %>% filter(type=="endemic") %>% pull(genome)
marginal_genomes <- core_microbiota %>% filter(type=="marginal") %>% pull(genome)

genome_counts_core <- genome_counts_filt %>%
  filter(genome %in% core_genomes) %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  column_to_rownames(., "genome") %>%
  select_if(~ !all(. == 0)) %>%
  filter(rowSums(.) != 0)        

genome_counts_endemic <- genome_counts_filt %>%
  filter(genome %in% endemic_genomes) %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  column_to_rownames(., "genome") %>%
  select_if(~ !all(. == 0)) %>%
  filter(rowSums(.) != 0)        

genome_counts_marginal <- genome_counts_filt %>%
  filter(genome %in% marginal_genomes) %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  column_to_rownames(., "genome") %>%
  select_if(~ !all(. == 0)) %>%
  filter(rowSums(.) != 0)        

GIFTs_community_core <- to.community(GIFTs_elements_filtered[rownames(genome_counts_core),],genome_counts_core,GIFT_db) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="sample") %>% 
  pivot_longer(!sample,names_to = "gift",values_to = "value")

GIFTs_community_endemic <- to.community(GIFTs_elements_filtered[rownames(genome_counts_endemic),],genome_counts_endemic,GIFT_db) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="sample") %>% 
  pivot_longer(!sample,names_to = "gift",values_to = "value")

GIFTs_community_marginal <- to.community(GIFTs_elements_filtered[rownames(genome_counts_marginal),],genome_counts_marginal,GIFT_db) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="sample") %>% 
  pivot_longer(!sample,names_to = "gift",values_to = "value")
GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)
# A tibble: 0 × 5
# ℹ 5 variables: gift <chr>, high <dbl>, low <dbl>, p_value <dbl>, p_adjust <dbl>
GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>%
    rename(trait=gift,gift=value) %>% 
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ environment, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=8),
              strip.text.y = element_text(angle = 0)
              ) +
        labs(y="Traits",x="Samples",fill="GIFT")

GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=sample,y=mci)) +
        geom_col() +
        ylim(0,0.5) +
        facet_grid(. ~ environment, scales="free",space="free")+
        theme_minimal()

GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=environment,y=mci, group=environment)) +
        geom_boxplot() +
        ylim(0,0.5) +
        theme_minimal()

GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>% 
    wilcox.test(mci ~ environment,.)

    Wilcoxon rank sum exact test

data:  mci by environment
W = 55, p-value = 0.01643
alternative hypothesis: true location shift is not equal to 0
GIFTs_community_core_significance <- GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    mutate(difference=high-low) %>%
    mutate(significance = case_when(
      p_adjust <= 0.05 ~ "significant",
      p_adjust > 0.05 ~ "non-significant")) 

GIFTs_community_core_significance %>% 
      filter(significance == "significant")
# A tibble: 0 × 7
# ℹ 7 variables: gift <chr>, high <dbl>, low <dbl>, p_value <dbl>, p_adjust <dbl>, difference <dbl>, significance <chr>
GIFTs_community_core_significance %>% 
    ggplot(aes(y=forcats::fct_rev(factor(gift)),x=difference, fill=significance)) +
      scale_fill_manual(values=c("#cccccc","#444444"))+
      geom_col() + 
      xlim(-0.25,0.25) +
      theme_minimal()

community_core_dist <- GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist()
  
adonis2(community_core_dist ~ environment * river,
        by="terms", 
        data = sample_metadata %>%
                filter(sample %in% labels(community_core_dist)) %>% 
                arrange(match(sample,labels(community_core_dist))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
environment 1 0.7158447 0.06965384 2.136654 0.047
river 2 0.8505330 0.08275942 1.269336 0.226
Residual 26 8.7107971 0.84758674 NA NA
Total 29 10.2771749 1.00000000 NA NA
pcoa_core <- GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    vegdist(method="euclidean") %>%
    pcoa()

pcoa_core_rel_eigen <- pcoa_core$values$Relative_eig[1:10]

pcoa_core_vectors <- pcoa_core$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2)

pcoa_core_eigenvalues <- pcoa_core$values$Eigenvalues[c(1,2)]

pcoa_core_traits <- cov(GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"), scale(pcoa_core_vectors)) %*% diag((pcoa_core_eigenvalues/(nrow(GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"))-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(!label %in% c("S01","S02","S03"))

scale <- 15 # scale for vector loadings
pcoa_core_vectors %>% 
  rownames_to_column(var="sample") %>% 
  left_join(sample_metadata, by="sample") %>%
  ggplot() +
      #genome positions
      geom_point(aes(x=Axis.1,y=Axis.2, color=environment), 
                 alpha=0.9, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.1,5)) +
      #loading positions
      geom_segment(data=pcoa_core_traits, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(pcoa_core_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(pcoa_core_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = pcoa_core_traits,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    theme_minimal()

GIFTs_community_core %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist() %>% 
    vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>% 
    vegan::scores() %>%
    as_tibble(., rownames = "sample") %>% 
    left_join(sample_metadata, by = "sample") %>%
      group_by(environment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = environment)) +
      geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = treatment_colors) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="Altitude", shape="River")+
    geom_text_repel(aes(label = sample), size=3)

10.1.6.2 Endemic

GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>%
    rename(trait=gift,gift=value) %>% 
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ environment, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=8),
              strip.text.y = element_text(angle = 0)
              ) +
        labs(y="Traits",x="Samples",fill="GIFT")

GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=sample,y=mci)) +
        geom_col() +
        facet_grid(. ~ environment, scales="free",space="free")+
        theme_minimal()

GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=environment,y=mci, group=environment)) +
        geom_boxplot() +
        ylim(0,0.5) +
        theme_minimal()

GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>% 
    wilcox.test(mci ~ environment,.)

    Wilcoxon rank sum exact test

data:  mci by environment
W = 24, p-value = 8.979e-05
alternative hypothesis: true location shift is not equal to 0
GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05) %>% 
    tt()
gift high low p_value p_adjust
B0104 0.318371850 0.477248051 4.266185e-03 8.738798e-03
B0106 0.652417204 0.750373702 2.349615e-02 3.825655e-02
B0204 0.272751511 0.411834564 9.967024e-04 2.751765e-03
B0205 0.349827863 0.607824698 2.150627e-04 7.803705e-04
B0207 0.349413509 0.607091324 8.978999e-05 4.072617e-04
B0208 0.277418167 0.571441654 1.125340e-04 4.610265e-04
B0209 0.418950756 0.654470027 1.665215e-03 3.916339e-03
B0210 0.314626667 0.599130873 2.701874e-03 5.815898e-03
B0211 0.505796266 0.763739750 2.514223e-06 6.450676e-05
B0212 0.341025004 0.546199073 4.939623e-03 9.802064e-03
B0213 0.415890582 0.550634314 1.643172e-02 2.782438e-02
B0215 0.255319614 0.554385824 5.777813e-04 1.834456e-03
B0216 0.105448827 0.426833768 2.041033e-05 1.993932e-04
B0217 0.281630700 0.400857555 6.955501e-04 2.154509e-03
B0220 0.108696049 0.075493630 9.874823e-03 1.717949e-02
B0221 0.255637379 0.383346961 9.874823e-03 1.717949e-02
B0303 0.168217937 0.258794974 2.643028e-04 8.833279e-04
B0309 0.028042752 0.096603777 7.399332e-03 1.408868e-02
B0310 0.000000000 0.039314500 8.257612e-03 1.519879e-02
B0402 0.556281087 0.493520756 1.408119e-03 3.804919e-03
B0601 0.382208213 0.577022449 2.654761e-05 2.408248e-04
B0602 0.506114317 0.745114749 1.403581e-04 5.401660e-04
B0603 0.260255972 0.467609340 5.618966e-05 3.161839e-04
B0701 0.432395583 0.679531792 8.978999e-05 4.072617e-04
B0703 0.022340365 0.156111414 5.726164e-05 3.161839e-04
B0704 0.312937738 0.598325101 3.432236e-05 2.564082e-04
B0705 0.184992803 0.398754859 1.554950e-05 1.645656e-04
B0706 0.336719078 0.509754909 7.543545e-03 1.408868e-02
B0707 0.584533168 0.687871927 7.543545e-03 1.408868e-02
B0710 0.016909779 0.108809582 1.581108e-03 3.916339e-03
B0711 0.208154526 0.416638817 7.121052e-05 3.617495e-04
B0712 0.099046619 0.204586247 5.703508e-03 1.114378e-02
B0804 0.456763954 0.763616891 1.177172e-05 1.359099e-04
B0805 0.042739966 0.173618219 5.802053e-07 4.360009e-05
B0901 0.100157586 0.041913672 1.665215e-03 3.916339e-03
B0903 0.000000000 0.019080084 2.539636e-06 6.450676e-05
B1004 0.125149306 0.140982835 8.964396e-03 1.603490e-02
B1012 0.001356128 0.019240248 1.619167e-04 6.048064e-04
B1014 0.022601362 0.000000000 2.539636e-06 6.450676e-05
D0104 0.058683884 0.159932473 2.306458e-03 5.050347e-03
D0201 0.092234219 0.260028653 8.340902e-04 2.407488e-03
D0202 0.160795013 0.371123533 5.618966e-05 3.161839e-04
D0203 0.342605111 0.471551663 3.432236e-05 2.564082e-04
D0204 0.250243216 0.395633777 2.306458e-03 5.050347e-03
D0205 0.063752861 0.169920437 7.121052e-05 3.617495e-04
D0206 0.147551315 0.443090128 5.618966e-05 3.161839e-04
D0207 0.412156668 0.558207530 2.090231e-02 3.447523e-02
D0208 0.155823369 0.301738922 5.618966e-05 3.161839e-04
D0209 0.131997770 0.352678185 6.549873e-06 9.242598e-05
D0210 0.133702102 0.284613983 1.177172e-05 1.359099e-04
D0212 0.133552981 0.422562064 6.549873e-06 9.242598e-05
D0213 0.126218478 0.306630447 8.340902e-04 2.407488e-03
D0305 0.366492686 0.423996527 4.939623e-03 9.802064e-03
D0306 0.146105878 0.434641186 6.549873e-06 9.242598e-05
D0501 0.790847790 0.872992541 1.805380e-02 3.016885e-02
D0502 0.480358478 0.285918054 4.781407e-04 1.557022e-03
D0503 0.125879327 0.000000000 6.866156e-07 4.360009e-05
D0505 0.286597240 0.350858031 8.641809e-03 1.567871e-02
D0506 0.211360464 0.391670686 6.549873e-06 9.242598e-05
D0507 0.110052668 0.210004157 1.125340e-04 4.610265e-04
D0509 0.440041988 0.526487614 1.125601e-02 1.931774e-02
D0510 0.000000000 0.013573683 2.308808e-04 7.924829e-04
D0511 0.611751724 0.767405683 1.665215e-03 3.916339e-03
D0513 0.224858211 0.438902945 3.155375e-03 6.678878e-03
D0601 0.034174614 0.141369358 2.997491e-02 4.758517e-02
D0602 0.000000000 0.030499587 2.308808e-04 7.924829e-04
D0606 0.003034342 0.043637689 5.152105e-05 3.161839e-04
D0607 0.008552574 0.041697052 2.096373e-03 4.840715e-03
D0608 0.000000000 0.002444638 3.625355e-03 7.547870e-03
D0609 0.158687978 0.084565489 1.403581e-04 5.401660e-04
D0610 0.000000000 0.007160308 1.522953e-03 3.916339e-03
D0702 0.455688142 0.326464654 2.635404e-02 4.236663e-02
D0705 0.323241782 0.258159545 2.306458e-03 5.050347e-03
D0706 0.059976159 0.148351231 1.665215e-03 3.916339e-03
D0807 0.015074471 0.041747020 8.978999e-05 4.072617e-04
D0816 0.043641253 0.091390659 8.340902e-04 2.407488e-03
D0817 0.005827776 0.019925548 1.445050e-03 3.823361e-03
D0907 0.162914556 0.328681837 9.967024e-04 2.751765e-03
D0908 0.305403136 0.623522292 3.432236e-05 2.564082e-04
D0910 0.124407773 0.396891803 1.125340e-04 4.610265e-04
GIFTs_community_endemic_significance <- GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    mutate(difference=high-low) %>%
    mutate(significance = case_when(
      p_adjust <= 0.05 ~ "significant",
      p_adjust > 0.05 ~ "non-significant")) 

GIFTs_community_endemic_significance %>% 
      filter(significance == "significant") %>% 
      tt()
gift high low p_value p_adjust difference significance
B0104 0.318371850 0.477248051 4.266185e-03 8.738798e-03 -0.158876201 significant
B0106 0.652417204 0.750373702 2.349615e-02 3.825655e-02 -0.097956497 significant
B0204 0.272751511 0.411834564 9.967024e-04 2.751765e-03 -0.139083053 significant
B0205 0.349827863 0.607824698 2.150627e-04 7.803705e-04 -0.257996835 significant
B0207 0.349413509 0.607091324 8.978999e-05 4.072617e-04 -0.257677815 significant
B0208 0.277418167 0.571441654 1.125340e-04 4.610265e-04 -0.294023487 significant
B0209 0.418950756 0.654470027 1.665215e-03 3.916339e-03 -0.235519272 significant
B0210 0.314626667 0.599130873 2.701874e-03 5.815898e-03 -0.284504206 significant
B0211 0.505796266 0.763739750 2.514223e-06 6.450676e-05 -0.257943484 significant
B0212 0.341025004 0.546199073 4.939623e-03 9.802064e-03 -0.205174069 significant
B0213 0.415890582 0.550634314 1.643172e-02 2.782438e-02 -0.134743732 significant
B0215 0.255319614 0.554385824 5.777813e-04 1.834456e-03 -0.299066210 significant
B0216 0.105448827 0.426833768 2.041033e-05 1.993932e-04 -0.321384940 significant
B0217 0.281630700 0.400857555 6.955501e-04 2.154509e-03 -0.119226854 significant
B0220 0.108696049 0.075493630 9.874823e-03 1.717949e-02 0.033202420 significant
B0221 0.255637379 0.383346961 9.874823e-03 1.717949e-02 -0.127709582 significant
B0303 0.168217937 0.258794974 2.643028e-04 8.833279e-04 -0.090577037 significant
B0309 0.028042752 0.096603777 7.399332e-03 1.408868e-02 -0.068561026 significant
B0310 0.000000000 0.039314500 8.257612e-03 1.519879e-02 -0.039314500 significant
B0402 0.556281087 0.493520756 1.408119e-03 3.804919e-03 0.062760331 significant
B0601 0.382208213 0.577022449 2.654761e-05 2.408248e-04 -0.194814235 significant
B0602 0.506114317 0.745114749 1.403581e-04 5.401660e-04 -0.239000432 significant
B0603 0.260255972 0.467609340 5.618966e-05 3.161839e-04 -0.207353368 significant
B0701 0.432395583 0.679531792 8.978999e-05 4.072617e-04 -0.247136209 significant
B0703 0.022340365 0.156111414 5.726164e-05 3.161839e-04 -0.133771050 significant
B0704 0.312937738 0.598325101 3.432236e-05 2.564082e-04 -0.285387364 significant
B0705 0.184992803 0.398754859 1.554950e-05 1.645656e-04 -0.213762056 significant
B0706 0.336719078 0.509754909 7.543545e-03 1.408868e-02 -0.173035831 significant
B0707 0.584533168 0.687871927 7.543545e-03 1.408868e-02 -0.103338760 significant
B0710 0.016909779 0.108809582 1.581108e-03 3.916339e-03 -0.091899802 significant
B0711 0.208154526 0.416638817 7.121052e-05 3.617495e-04 -0.208484291 significant
B0712 0.099046619 0.204586247 5.703508e-03 1.114378e-02 -0.105539628 significant
B0804 0.456763954 0.763616891 1.177172e-05 1.359099e-04 -0.306852936 significant
B0805 0.042739966 0.173618219 5.802053e-07 4.360009e-05 -0.130878253 significant
B0901 0.100157586 0.041913672 1.665215e-03 3.916339e-03 0.058243913 significant
B0903 0.000000000 0.019080084 2.539636e-06 6.450676e-05 -0.019080084 significant
B1004 0.125149306 0.140982835 8.964396e-03 1.603490e-02 -0.015833529 significant
B1012 0.001356128 0.019240248 1.619167e-04 6.048064e-04 -0.017884120 significant
B1014 0.022601362 0.000000000 2.539636e-06 6.450676e-05 0.022601362 significant
D0104 0.058683884 0.159932473 2.306458e-03 5.050347e-03 -0.101248589 significant
D0201 0.092234219 0.260028653 8.340902e-04 2.407488e-03 -0.167794434 significant
D0202 0.160795013 0.371123533 5.618966e-05 3.161839e-04 -0.210328521 significant
D0203 0.342605111 0.471551663 3.432236e-05 2.564082e-04 -0.128946551 significant
D0204 0.250243216 0.395633777 2.306458e-03 5.050347e-03 -0.145390561 significant
D0205 0.063752861 0.169920437 7.121052e-05 3.617495e-04 -0.106167576 significant
D0206 0.147551315 0.443090128 5.618966e-05 3.161839e-04 -0.295538813 significant
D0207 0.412156668 0.558207530 2.090231e-02 3.447523e-02 -0.146050862 significant
D0208 0.155823369 0.301738922 5.618966e-05 3.161839e-04 -0.145915553 significant
D0209 0.131997770 0.352678185 6.549873e-06 9.242598e-05 -0.220680415 significant
D0210 0.133702102 0.284613983 1.177172e-05 1.359099e-04 -0.150911881 significant
D0212 0.133552981 0.422562064 6.549873e-06 9.242598e-05 -0.289009083 significant
D0213 0.126218478 0.306630447 8.340902e-04 2.407488e-03 -0.180411970 significant
D0305 0.366492686 0.423996527 4.939623e-03 9.802064e-03 -0.057503841 significant
D0306 0.146105878 0.434641186 6.549873e-06 9.242598e-05 -0.288535308 significant
D0501 0.790847790 0.872992541 1.805380e-02 3.016885e-02 -0.082144750 significant
D0502 0.480358478 0.285918054 4.781407e-04 1.557022e-03 0.194440424 significant
D0503 0.125879327 0.000000000 6.866156e-07 4.360009e-05 0.125879327 significant
D0505 0.286597240 0.350858031 8.641809e-03 1.567871e-02 -0.064260791 significant
D0506 0.211360464 0.391670686 6.549873e-06 9.242598e-05 -0.180310222 significant
D0507 0.110052668 0.210004157 1.125340e-04 4.610265e-04 -0.099951489 significant
D0509 0.440041988 0.526487614 1.125601e-02 1.931774e-02 -0.086445625 significant
D0510 0.000000000 0.013573683 2.308808e-04 7.924829e-04 -0.013573683 significant
D0511 0.611751724 0.767405683 1.665215e-03 3.916339e-03 -0.155653959 significant
D0513 0.224858211 0.438902945 3.155375e-03 6.678878e-03 -0.214044734 significant
D0601 0.034174614 0.141369358 2.997491e-02 4.758517e-02 -0.107194744 significant
D0602 0.000000000 0.030499587 2.308808e-04 7.924829e-04 -0.030499587 significant
D0606 0.003034342 0.043637689 5.152105e-05 3.161839e-04 -0.040603346 significant
D0607 0.008552574 0.041697052 2.096373e-03 4.840715e-03 -0.033144478 significant
D0608 0.000000000 0.002444638 3.625355e-03 7.547870e-03 -0.002444638 significant
D0609 0.158687978 0.084565489 1.403581e-04 5.401660e-04 0.074122489 significant
D0610 0.000000000 0.007160308 1.522953e-03 3.916339e-03 -0.007160308 significant
D0702 0.455688142 0.326464654 2.635404e-02 4.236663e-02 0.129223488 significant
D0705 0.323241782 0.258159545 2.306458e-03 5.050347e-03 0.065082237 significant
D0706 0.059976159 0.148351231 1.665215e-03 3.916339e-03 -0.088375073 significant
D0807 0.015074471 0.041747020 8.978999e-05 4.072617e-04 -0.026672549 significant
D0816 0.043641253 0.091390659 8.340902e-04 2.407488e-03 -0.047749406 significant
D0817 0.005827776 0.019925548 1.445050e-03 3.823361e-03 -0.014097772 significant
D0907 0.162914556 0.328681837 9.967024e-04 2.751765e-03 -0.165767281 significant
D0908 0.305403136 0.623522292 3.432236e-05 2.564082e-04 -0.318119156 significant
D0910 0.124407773 0.396891803 1.125340e-04 4.610265e-04 -0.272484030 significant
GIFTs_community_endemic_significance %>% 
    ggplot(aes(y=forcats::fct_rev(factor(gift)),x=difference, fill=significance)) +
      scale_fill_manual(values=c("#cccccc","#444444"))+
      geom_col() + 
      xlim(-0.25,0.25) +
      theme_minimal()

community_endemic_dist <- GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist()
  
adonis2(community_endemic_dist ~ environment * river,
        by="terms", 
        data = sample_metadata %>%
                filter(sample %in% labels(community_endemic_dist)) %>% 
                arrange(match(sample,labels(community_endemic_dist))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
environment 1 18.732917 0.27521376 11.099093 0.001
river 2 5.451379 0.08008868 1.614948 0.151
Residual 26 43.882492 0.64469756 NA NA
Total 29 68.066788 1.00000000 NA NA
pcoa_endemic <- GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    vegdist(method="euclidean") %>%
    pcoa()

pcoa_endemic_rel_eigen <- pcoa_endemic$values$Relative_eig[1:10]

pcoa_endemic_vectors <- pcoa_endemic$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2)

pcoa_endemic_eigenvalues <- pcoa_endemic$values$Eigenvalues[c(1,2)]

pcoa_endemic_traits <- cov(GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"), scale(pcoa_endemic_vectors)) %*% diag((pcoa_endemic_eigenvalues/(nrow(GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"))-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(!label %in% c("S01","S02","S03"))

scale <- 15 # scale for vector loadings
pcoa_endemic_vectors %>% 
  rownames_to_column(var="sample") %>% 
  left_join(sample_metadata, by="sample") %>%
  ggplot() +
      #genome positions
      geom_point(aes(x=Axis.1,y=Axis.2, color=environment), 
                 alpha=0.9, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.1,5)) +
      #loading positions
      geom_segment(data=pcoa_endemic_traits, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(pcoa_endemic_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(pcoa_endemic_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = pcoa_endemic_traits,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    theme_minimal() 

GIFTs_community_endemic %>%
    filter(sample!="EHI02625") %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist() %>% 
    vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>% 
    vegan::scores() %>%
    as_tibble(., rownames = "sample") %>% 
    left_join(sample_metadata, by = "sample") %>%
      group_by(environment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = environment)) +
      geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = treatment_colors) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="Altitude", shape="River")+
    geom_text_repel(aes(label = sample), size=3)

10.1.6.3 Marginal

GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>%
    rename(trait=gift,gift=value) %>% 
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ environment, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=8),
              strip.text.y = element_text(angle = 0)
              ) +
        labs(y="Traits",x="Samples",fill="GIFT")

GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=sample,y=mci)) +
        geom_col() +
        ylim(0,0.5) +
        facet_grid(. ~ environment, scales="free",space="free")+
        theme_minimal()

GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>%
    ggplot(aes(x=environment,y=mci, group=environment)) +
        geom_boxplot() +
        ylim(0,0.5) +
        theme_minimal()

GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    group_by(sample) %>% 
    summarise(mci=mean(value,na.rm=TRUE)) %>% 
    left_join(sample_metadata, by = "sample") %>% 
    wilcox.test(mci ~ environment,.)

    Wilcoxon rank sum exact test

data:  mci by environment
W = 56, p-value = 0.03277
alternative hypothesis: true location shift is not equal to 0
GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)
# A tibble: 33 × 5
   gift     high    low  p_value p_adjust
   <chr>   <dbl>  <dbl>    <dbl>    <dbl>
 1 B0221 0.177   0.527  0.00268    0.0297
 2 B0605 0.0886  0.396  0.00663    0.0371
 3 B0709 0       0.0329 0.000980   0.0196
 4 B1028 0.00776 0.0891 0.00224    0.0297
 5 D0101 0.00637 0.0634 0.00261    0.0297
 6 D0103 0.0689  0.216  0.00839    0.0435
 7 D0201 0.0589  0.252  0.00276    0.0297
 8 D0202 0.110   0.323  0.00820    0.0435
 9 D0203 0.214   0.424  0.00932    0.0450
10 D0204 0.225   0.483  0.00631    0.0368
# ℹ 23 more rows
GIFTs_community_marginal_significance <- GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    left_join(sample_metadata, by = "sample") %>% 
    group_by(gift) %>%
    summarise(high=mean(value[environment == "high"], na.rm = TRUE),
              low=mean(value[environment == "low"], na.rm = TRUE),
              p_value = wilcox.test(value ~ environment)$p.value
              ) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    mutate(difference=high-low) %>%
    mutate(significance = case_when(
      p_adjust <= 0.05 ~ "significant",
      p_adjust > 0.05 ~ "non-significant")) 

GIFTs_community_marginal_significance %>% 
      filter(significance == "significant") %>% 
      tt()
gift high low p_value p_adjust difference significance
B0221 0.177144655 0.52701883 2.677235e-03 0.029723307 -0.34987417 significant
B0605 0.088604745 0.39623709 6.627165e-03 0.037112124 -0.30763234 significant
B0709 0.000000000 0.03290746 9.803796e-04 0.019607591 -0.03290746 significant
B1028 0.007758061 0.08906279 2.236734e-03 0.029723307 -0.08130473 significant
D0101 0.006371054 0.06336166 2.607633e-03 0.029723307 -0.05699060 significant
D0103 0.068861705 0.21552103 8.387951e-03 0.043493078 -0.14665933 significant
D0201 0.058876923 0.25212032 2.760021e-03 0.029723307 -0.19324339 significant
D0202 0.110126355 0.32295102 8.200669e-03 0.043493078 -0.21282467 significant
D0203 0.213841081 0.42380897 9.322462e-03 0.045004989 -0.20996789 significant
D0204 0.225490303 0.48319440 6.306083e-03 0.036785487 -0.25770410 significant
D0205 0.064765347 0.17098720 9.226787e-03 0.045004989 -0.10622185 significant
D0208 0.113400443 0.25966177 4.235560e-03 0.032943248 -0.14626132 significant
D0209 0.101669651 0.28432354 1.758623e-03 0.027356358 -0.18265389 significant
D0212 0.172471768 0.29920957 1.048346e-02 0.046042591 -0.12673780 significant
D0213 0.094273805 0.26659680 3.217419e-03 0.030029244 -0.17232300 significant
D0301 0.000000000 0.07340385 6.096424e-03 0.036785487 -0.07340385 significant
D0302 0.004398428 0.08700617 2.275440e-04 0.006371233 -0.08260774 significant
D0304 0.009925215 0.22320473 5.606955e-05 0.002616579 -0.21327951 significant
D0307 0.015936692 0.24428734 1.718478e-04 0.006014672 -0.22835064 significant
D0308 0.119232778 0.32375506 5.354418e-03 0.036785487 -0.20452228 significant
D0502 0.059489928 0.55204220 1.708261e-05 0.001195782 -0.49255228 significant
D0503 0.018551988 0.14195916 1.085290e-02 0.046042591 -0.12340717 significant
D0509 0.273367616 0.53084131 1.054085e-02 0.046042591 -0.25747370 significant
D0518 0.092542585 0.27773775 4.091025e-03 0.032943248 -0.18519517 significant
D0603 0.000000000 0.10245182 6.096424e-03 0.036785487 -0.10245182 significant
D0607 0.009275994 0.10179302 6.834773e-04 0.015947804 -0.09251702 significant
D0611 0.000000000 0.14970317 6.096424e-03 0.036785487 -0.14970317 significant
D0704 0.163489399 0.43901684 3.994756e-03 0.032943248 -0.27552744 significant
D0706 0.003315462 0.10115430 5.202430e-03 0.036785487 -0.09783884 significant
D0807 0.006303428 0.21451084 1.251696e-05 0.001195782 -0.20820742 significant
D0816 0.035535517 0.22732398 3.217419e-03 0.030029244 -0.19178846 significant
D0817 0.002166158 0.01700964 1.492190e-03 0.026113324 -0.01484348 significant
D0904 0.095233051 0.00000000 1.053795e-02 0.046042591 0.09523305 significant
GIFTs_community_marginal_significance %>% 
    ggplot(aes(y=forcats::fct_rev(factor(gift)),x=difference, fill=significance)) +
      scale_fill_manual(values=c("#cccccc","#444444"))+
      geom_col() + 
      xlim(-0.25,0.25) +
      theme_minimal()

community_marginal_dist <- GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    filter(!is.na(value)) %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist()
  
adonis2(community_marginal_dist ~ environment * river,
        by="terms", 
        data = sample_metadata %>%
                filter(sample %in% labels(community_marginal_dist)) %>% 
                arrange(match(sample,labels(community_marginal_dist))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
environment 1 22.66237 0.11067970 3.415318 0.024
river 2 16.20626 0.07914899 1.221177 0.274
Residual 25 165.88774 0.81017131 NA NA
Total 28 204.75637 1.00000000 NA NA
pcoa_marginal <- GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    filter(!is.na(value)) %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    vegdist(method="euclidean") %>%
    pcoa()

pcoa_marginal_rel_eigen <- pcoa_marginal$values$Relative_eig[1:10]

pcoa_marginal_vectors <- pcoa_marginal$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2)

pcoa_marginal_eigenvalues <- pcoa_marginal$values$Eigenvalues[c(1,2)]

pcoa_marginal_traits <- cov(GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% filter(!is.na(value)) %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"), scale(pcoa_marginal_vectors)) %*% diag((pcoa_marginal_eigenvalues/(nrow(GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% filter(!is.na(value)) %>%  
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample"))-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(!label %in% c("S01","S02","S03"))

scale <- 15 # scale for vector loadings
pcoa_marginal_vectors %>% 
  rownames_to_column(var="sample") %>% 
  left_join(sample_metadata, by="sample") %>%
  ggplot() +
      #genome positions
      geom_point(aes(x=Axis.1,y=Axis.2, color=environment), 
                 alpha=0.9, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.1,5)) +
      #loading positions
      geom_segment(data=pcoa_marginal_traits, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(pcoa_marginal_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(pcoa_marginal_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = pcoa_marginal_traits,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    theme_minimal() + 
    theme(legend.position = "none")

GIFTs_community_marginal %>%
    filter(sample!="EHI02625") %>% 
    filter(!is.na(value)) %>% 
    pivot_wider(names_from="gift",values_from="value") %>% 
    column_to_rownames(var="sample") %>%
    dist() %>% 
    vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>% 
    vegan::scores() %>%
    as_tibble(., rownames = "sample") %>% 
    left_join(sample_metadata, by = "sample") %>%
      group_by(environment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = environment)) +
      geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = treatment_colors) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="Altitude", shape="River")+
    geom_text_repel(aes(label = sample), size=3)

10.2 Skin microbiome

# A tibble: 3 × 2
  type_river count
  <chr>      <int>
1 core          25
2 endemic        7
3 marginal      11
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

10.2.1 Taxonomic variation

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  group_by(type,sample) %>% 
  summarise(fraction=sum(count)) %>% 
  group_by(type) %>% 
  summarise(mean(fraction))
`summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
# A tibble: 3 × 2
  type     `mean(fraction)`
  <chr>               <dbl>
1 core               0.901 
2 endemic            0.132 
3 marginal           0.0874

10.2.2 Genome size

# A tibble: 3 × 2
  type     genome_size
  <chr>          <dbl>
1 core        3337341.
2 endemic     3753149.
3 marginal    3281875.
# A tibble: 3 × 9
  .y.    group1  group2      n1    n2 statistic     p p.adj p.adj.signif
* <chr>  <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>       
1 length core    endemic     25     7        69 0.42  0.973 ns          
2 length core    marginal    25    11       139 0.973 0.973 ns          
3 length endemic marginal     7    11        44 0.659 0.973 ns          

### Functional differences between fractions

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>%
    ggplot(aes(x=type, y=value, group=type))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type, p.adjust.method = "BH")
# A tibble: 3 × 10
  trait .y.   group1  group2      n1    n2 statistic     p p.adj p.adj.signif
* <chr> <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>       
1 mci   value core    endemic     23     7        72 0.701 0.701 ns          
2 mci   value core    marginal    23    10       177 0.014 0.042 *           
3 mci   value endemic marginal     7    10        53 0.088 0.132 ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type, p.adjust.method = "BH") %>% 
    print(n=100)
# A tibble: 42 × 10
   trait .y.   group1  group2      n1    n2 statistic     p p.adj p.adj.signif
 * <chr> <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>       
 1 B01   value core    endemic     23     7      57.5 0.27  0.27  ns          
 2 B01   value core    marginal    23    10     168.  0.038 0.057 ns          
 3 B01   value endemic marginal     7    10      61   0.01  0.029 *           
 4 B02   value core    endemic     23     7      69   0.598 0.598 ns          
 5 B02   value core    marginal    23    10     173   0.024 0.073 ns          
 6 B02   value endemic marginal     7    10      53   0.088 0.131 ns          
 7 B03   value core    endemic     23     7      88   0.73  0.73  ns          
 8 B03   value core    marginal    23    10     160.  0.08  0.24  ns          
 9 B03   value endemic marginal     7    10      44   0.399 0.599 ns          
10 B04   value core    endemic     23     7      92.5 0.572 0.623 ns          
11 B04   value core    marginal    23    10     150.  0.182 0.546 ns          
12 B04   value endemic marginal     7    10      40.5 0.623 0.623 ns          
13 B06   value core    endemic     23     7      79   0.961 0.961 ns          
14 B06   value core    marginal    23    10     153   0.142 0.256 ns          
15 B06   value endemic marginal     7    10      49.5 0.171 0.256 ns          
16 B07   value core    endemic     23     7      65   0.471 0.471 ns          
17 B07   value core    marginal    23    10     174   0.022 0.042 *           
18 B07   value endemic marginal     7    10      58   0.028 0.042 *           
19 B08   value core    endemic     23     7      54   0.202 0.202 ns          
20 B08   value core    marginal    23    10     171   0.029 0.044 *           
21 B08   value endemic marginal     7    10      62   0.01  0.029 *           
22 D01   value core    endemic     23     7      98.5 0.39  0.585 ns          
23 D01   value core    marginal    23    10     172.  0.028 0.084 ns          
24 D01   value endemic marginal     7    10      39   0.73  0.73  ns          
25 D02   value core    endemic     23     7      66.5 0.508 0.508 ns          
26 D02   value core    marginal    23    10     168.  0.038 0.113 ns          
27 D02   value endemic marginal     7    10      53   0.086 0.13  ns          
28 D03   value core    endemic     23     7      84   0.883 0.883 ns          
29 D03   value core    marginal    23    10     182.  0.01  0.029 *           
30 D03   value endemic marginal     7    10      56.5 0.038 0.057 ns          
31 D05   value core    endemic     23     7      90   0.666 0.666 ns          
32 D05   value core    marginal    23    10     169   0.034 0.104 ns          
33 D05   value endemic marginal     7    10      46   0.315 0.473 ns          
34 D06   value core    endemic     23     7      64.5 0.447 0.447 ns          
35 D06   value core    marginal    23    10     163   0.062 0.092 ns          
36 D06   value endemic marginal     7    10      57   0.034 0.092 ns          
37 D07   value core    endemic     23     7      80.5 1     1     ns          
38 D07   value core    marginal    23    10     180.  0.012 0.036 *           
39 D07   value endemic marginal     7    10      51.5 0.11  0.165 ns          
40 D09   value core    endemic     23     7      93   0.537 0.806 ns          
41 D09   value core    marginal    23    10     136.  0.387 0.806 ns          
42 D09   value endemic marginal     7    10      35   1     1     ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type),by="genome") %>%
    pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type)) %>% 
    ggplot(aes(x=type, y=value, group=type))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

10.2.3 Functional differences between high and low endemisms

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment)) %>%
    ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    to.domains(., GIFT_db) %>%
    as.data.frame() %>%
    mutate(mci=(Biosynthesis+Degradation)/2) %>% 
    rownames_to_column(var="genome") %>% 
    select(genome,mci) %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment)) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH")
# A tibble: 1 × 10
  trait .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
* <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
1 mci   value high   low        5     2         2 0.381 0.381 ns          
genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
      geom_boxplot()+
      facet_grid(~trait,  scales="free")

genome_gifts %>%
    to.elements(., GIFT_db) %>%
    to.functions(., GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
    filter(type == "endemic") %>% 
    select(-type) %>% 
    pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>% 
    filter(!is.na(type_prevalence_environment),
           trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
    group_by(trait) %>%
    pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH") %>% 
    print(n=100)
# A tibble: 14 × 10
   trait .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
 * <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
 1 B01   value high   low        5     2       2   0.381 0.381 ns          
 2 B02   value high   low        5     2       2   0.381 0.381 ns          
 3 B03   value high   low        5     2       5   1     1     ns          
 4 B04   value high   low        5     2       4   0.857 0.857 ns          
 5 B06   value high   low        5     2       1.5 0.241 0.241 ns          
 6 B07   value high   low        5     2       1   0.19  0.19  ns          
 7 B08   value high   low        5     2       2   0.329 0.329 ns          
 8 D01   value high   low        5     2       1   0.171 0.171 ns          
 9 D02   value high   low        5     2       0   0.079 0.079 ns          
10 D03   value high   low        5     2       0   0.079 0.079 ns          
11 D05   value high   low        5     2       1   0.19  0.19  ns          
12 D06   value high   low        5     2       2   0.381 0.381 ns          
13 D07   value high   low        5     2       2   0.381 0.381 ns          
14 D09   value high   low        5     2       2   0.285 0.285 ns          

10.2.4 MAGs in different locations and shared among locations

10.2.4.1 Core

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="core") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.2.4.2 Endemic

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="endemic") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    ylim(0,1)+
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.2.4.3 Marginal

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="marginal") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    ylim(0,1)+
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

10.2.4.4 Ordination

gift_pcoa <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    vegdist(method="euclidean") %>%
    pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]


# Get genome positions
gift_pcoa_vectors <- gift_pcoa$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2) # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- cov(genome_gifts, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(genome_gifts)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(!label %in% c("S01","S02","S03"))
set.seed(101)
scale <- 20 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(genome_metadata, by="genome") %>%
  ggplot() +
      #genome positions
      scale_color_manual(values=phylum_colors)+
      geom_point(aes(x=Axis.1,y=Axis.2, color=phylum, size=length), 
                 alpha=0.9, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.1,5)) +
      #loading positions
      geom_segment(data=gift_pcoa_gifts, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = gift_pcoa_gifts,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    xlim(-4.5,4.5) + 
    ylim(-3,2.5) +
    theme_minimal() + 
    theme(legend.position = "none")

structural <- core_microbiota %>% 
  mutate(type = case_when(
      (type_prevalence == "endemic") & (type_prevalence_environment == "high") ~ "present_high",
       (type_prevalence == "endemic") & (type_prevalence_environment == "low") ~ "present_low")) %>% 
    filter(!is.na(type)) %>% 
    select(genome,type)

enriched <- ancom_mag_skin_table %>% 
  mutate(type = case_when(
      lfc_environmentlow < 0 ~ "enriched_low",
      lfc_environmentlow >= 0 ~ "enriched_high")) %>% 
  select(genome,type)


gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(bind_rows(structural,enriched), by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  mutate(type=ifelse(is.na(type),"neutral",type)) %>%
  group_by(type) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      scale_size_continuous(range = c(0.1,5)) +
      geom_point(aes(x=Axis.1,y=Axis.2, color=type, size=length), alpha=0.9, shape=16) +
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=type), alpha = 0.5) +
      scale_color_manual(values=c("#5A99D2","#F16144","#B7B7B7","#225072","#8C2C1C"))+
    theme_minimal()

scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(core_microbiota, by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  filter(type != "absent") %>% 
  mutate(shape = case_when(
      type == "core" ~ "core",
      type == "marginal" ~ "marginal",
      type == "endemic" & type_prevalence_environment == "high" ~ "endemic_high",
      type == "endemic" & type_prevalence_environment == "low" ~ "endemic_low"
    )) %>% 
  group_by(shape) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      #genome positions
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=shape), alpha = 0.5, show.legend = FALSE) +
      scale_color_manual(values=c("#9E8F71","#5A99D2","#F16144","#B7B7B7"))+
      theme_minimal() 

   # theme(legend.position = "none")
# A tibble: 33 × 5
   gift   high   low    p_value p_adjust
   <chr> <dbl> <dbl>      <dbl>    <dbl>
 1 B0104 0.681 0.824 0.00578    0.0279  
 2 B0106 0.894 0.747 0.00228    0.0123  
 3 B0208 0.476 0.720 0.000794   0.00631 
 4 B0213 0.582 0.795 0.00000629 0.000788
 5 B0220 0.179 0.311 0.00770    0.0359  
 6 B0302 0.233 0.457 0.00193    0.0108  
 7 B0307 0.370 0.201 0.000193   0.00260 
 8 B0309 0.134 0.269 0.000956   0.00679 
 9 B0402 0.222 0.357 0.000658   0.00631 
10 B0403 0.264 0.398 0.0116     0.0474  
# ℹ 23 more rows

term df SumOfSqs R2 statistic p.value
environment 1 14.464879 0.1785608 6.438403 0.001
river 2 8.130211 0.1003629 1.809402 0.097
Residual 26 58.413069 0.7210764 NA NA
Total 29 81.008159 1.0000000 NA NA

gift high low p_value p_adjust
B0104 0.67421663 1.0000000 0.0018754356 0.005672049
B0105 0.73759193 1.0000000 0.0018754356 0.005672049
B0106 0.80621259 1.0000000 0.0017459182 0.005672049
B0204 0.48537488 0.8606981 0.0145424059 0.028623148
B0206 0.61242517 1.0000000 0.0017937971 0.005672049
B0207 0.43143388 0.7100000 0.0019749754 0.005695278
B0209 0.79386259 1.0000000 0.0018754356 0.005672049
B0210 0.55247656 0.8606981 0.0145424059 0.028623148
B0213 0.61084133 0.8944683 0.0145424059 0.028623148
B0214 0.22485035 0.6834048 0.0219467710 0.041233327
B0218 0.46423032 0.7889365 0.0256749426 0.046819013
B0220 0.07385765 0.7171750 0.0010154015 0.005672049
B0601 0.54689170 0.9155746 0.0079058708 0.017824145
B0602 0.63919238 0.9155746 0.0145424059 0.028623148
B0603 0.66787867 1.0000000 0.0018754356 0.005672049
B0604 0.43445679 1.0000000 0.0018754356 0.005672049
B0605 0.00000000 0.4221269 0.0045906627 0.011161611
B0702 0.66854993 1.0000000 0.0018754356 0.005672049
B0703 0.07420061 0.6700000 0.0004222482 0.005672049
B0704 0.48294837 1.0000000 0.0018754356 0.005672049
B0706 0.41331224 1.0000000 0.0018754356 0.005672049
B0707 0.76551369 1.0000000 0.0018754356 0.005672049
B0710 0.06335476 0.1400000 0.0007537527 0.005672049
B0711 0.14734772 0.2664340 0.0010154015 0.005672049
B0712 0.71438862 0.3700429 0.0020470591 0.005768985
B0803 0.73937330 1.0000000 0.0018754356 0.005672049
B0804 0.52266792 0.8606981 0.0145424059 0.028623148
B1028 0.03147905 0.1400000 0.0014496951 0.005672049
B1029 0.00000000 0.1444683 0.0045906627 0.011161611
D0102 0.33798384 1.0000000 0.0018754356 0.005672049
D0103 0.16414075 0.6700000 0.0063405256 0.014834437
D0104 0.31230836 0.8000000 0.0019585157 0.005695278
D0201 0.06134920 0.8100000 0.0007255498 0.005672049
D0202 0.16943443 0.8037702 0.0010154015 0.005672049
D0203 0.22479340 0.4890978 0.0008505648 0.005672049
D0205 0.03972843 0.3057359 0.0010154015 0.005672049
D0206 0.00000000 0.6115146 0.0002918961 0.005672049
D0207 0.11256387 0.6128250 0.0010154015 0.005672049
D0208 0.13134920 0.4595489 0.0009808556 0.005672049
D0209 0.14134920 0.6079485 0.0010066900 0.005672049
D0210 0.19001248 0.2500000 0.0018754356 0.005672049
D0211 0.00000000 0.2195918 0.0045906627 0.011161611
D0213 0.00000000 0.3093019 0.0002918961 0.005672049
D0302 0.00000000 0.3300000 0.0001880890 0.005672049
D0306 0.11242517 0.5000000 0.0013515402 0.005672049
D0308 0.05621259 0.4422127 0.0007395593 0.005672049
D0309 0.25000000 0.6055317 0.0007325316 0.005672049
D0505 0.21728768 0.5264768 0.0145424059 0.028623148
D0506 0.16892979 0.5000000 0.0018099404 0.005672049
D0507 0.19124896 0.6273362 0.0010154015 0.005672049
D0508 0.08094613 0.2586895 0.0219467710 0.041233327
D0509 0.48650715 0.6444683 0.0223620729 0.041386523
D0511 0.32436673 1.0000000 0.0018754356 0.005672049
D0512 0.22485035 0.6834048 0.0219467710 0.041233327
D0513 0.06335476 0.3709022 0.0010154015 0.005672049
D0601 0.36577691 0.5000000 0.0018754356 0.005672049
D0604 0.00000000 0.4221269 0.0045906627 0.011161611
D0610 0.06917013 0.0000000 0.0051195000 0.012208038
D0611 0.11242517 0.5000000 0.0013515402 0.005672049
D0613 0.87641709 1.0000000 0.0126508551 0.027521158
D0704 0.56941341 1.0000000 0.0018754356 0.005672049
D0708 0.11242517 0.3944683 0.0120900251 0.026770770
D0804 0.67738433 0.0000000 0.0006075647 0.005672049
D0807 0.69205678 0.1544254 0.0020946080 0.005771809
D0813 0.00000000 0.2889365 0.0045906627 0.011161611
D0816 0.09668565 0.4820515 0.0065015311 0.014929442
D0901 0.00000000 0.4221269 0.0045906627 0.011161611
D0907 0.22485035 1.0000000 0.0013515402 0.005672049

term df SumOfSqs R2 statistic p.value
environment 1 49.92847 0.4416516 12.05864 0.001
river 2 13.43533 0.1188447 1.62244 0.187
Residual 12 49.68566 0.4395037 NA NA
Total 15 113.04945 1.0000000 NA NA

# A tibble: 0 × 5
# ℹ 5 variables: gift <chr>, high <dbl>, low <dbl>, p_value <dbl>, p_adjust <dbl>

term df SumOfSqs R2 statistic p.value
environment 1 12.004006 0.2843473 2.144208 0.106
river 1 7.818642 0.1852056 1.396600 0.318
Residual 4 22.393358 0.5304471 NA NA
Total 6 42.216006 1.0000000 NA NA